cdi = read_csv("./cdi.csv") %>%
janitor::clean_names() %>%
mutate(
crime_rate = crimes/pop * 1000
)
Note that the plots are not normal. They are all skewed.
#shapiro.test(cdi$totalinc)
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(totalinc, crime_rate)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Total personal income", y="Crimes rate")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
reg_totalinc = lm(crime_rate ~ totalinc, data = cdi)
summary(reg_totalinc)
##
## Call:
## lm(formula = crime_rate ~ totalinc, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.901 -18.609 -4.135 15.033 223.801
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.348e+01 1.488e+00 35.929 < 2e-16 ***
## totalinc 4.839e-04 9.867e-05 4.904 1.32e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.64 on 438 degrees of freedom
## Multiple R-squared: 0.05206, Adjusted R-squared: 0.04989
## F-statistic: 24.05 on 1 and 438 DF, p-value: 1.324e-06
There is a significant linear increasing between crime rate and total personal income
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(pcincome, crime_rate)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Per capita income", y="Crimes rate")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
reg_pcincome = lm(crime_rate ~ pcincome, data = cdi)
summary(reg_pcincome)
##
## Call:
## lm(formula = crime_rate ~ pcincome, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -52.506 -18.584 -3.998 14.791 237.750
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 67.3138796 6.0919784 11.050 <2e-16 ***
## pcincome -0.0005402 0.0003206 -1.685 0.0927 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 27.27 on 438 degrees of freedom
## Multiple R-squared: 0.006439, Adjusted R-squared: 0.004171
## F-statistic: 2.839 on 1 and 438 DF, p-value: 0.09274
There is NO significant linear relationship between crime rate and per capita income.
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(unemp, crime_rate)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Percent unemployment", y="Crimes rate")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
reg_unemp = lm(crime_rate ~ unemp, data = cdi)
summary(reg_unemp)
##
## Call:
## lm(formula = crime_rate ~ unemp, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -52.295 -19.243 -4.865 15.409 237.280
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 54.0598 3.9049 13.844 <2e-16 ***
## unemp 0.4891 0.5580 0.877 0.381
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 27.33 on 438 degrees of freedom
## Multiple R-squared: 0.001751, Adjusted R-squared: -0.000528
## F-statistic: 0.7683 on 1 and 438 DF, p-value: 0.3812
There is NO significant linear relationship between crime rate and percent unemployment.
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(poverty, crime_rate)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Percent below poverty level", y="Crimes rate")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
reg_poverty = lm(crime_rate ~ poverty, data = cdi)
summary(reg_poverty)
##
## Call:
## lm(formula = crime_rate ~ poverty, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.008 -14.578 -2.561 13.605 208.853
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.1390 2.4435 13.56 <2e-16 ***
## poverty 2.7690 0.2472 11.20 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24.12 on 438 degrees of freedom
## Multiple R-squared: 0.2226, Adjusted R-squared: 0.2209
## F-statistic: 125.4 on 1 and 438 DF, p-value: < 2.2e-16
There is significant linear increasing between crime rate and percent unemployment.
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(bagrad, crime_rate)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Percent bachelor’s degrees", y="Crimes rate")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
reg_bagrad = lm(crime_rate ~ bagrad, data = cdi)
summary(reg_bagrad)
##
## Call:
## lm(formula = crime_rate ~ bagrad, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -52.264 -19.407 -4.478 15.727 239.313
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 54.4035 3.8226 14.232 <2e-16 ***
## bagrad 0.1368 0.1705 0.802 0.423
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 27.34 on 438 degrees of freedom
## Multiple R-squared: 0.001467, Adjusted R-squared: -0.0008125
## F-statistic: 0.6436 on 1 and 438 DF, p-value: 0.4228
There is NO significant linear relationship between crime rate and percent bachelor’s degrees.
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(hsgrad, crime_rate)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Percent high school graduates", y="Crimes rate")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
reg_hsgrad = lm(crime_rate ~ hsgrad, data = cdi)
summary(reg_hsgrad)
##
## Call:
## lm(formula = crime_rate ~ hsgrad, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -54.07 -18.46 -3.64 16.37 226.47
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 125.6947 14.1191 8.902 < 2e-16 ***
## hsgrad -0.8820 0.1813 -4.865 1.6e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.65 on 438 degrees of freedom
## Multiple R-squared: 0.05126, Adjusted R-squared: 0.0491
## F-statistic: 23.67 on 1 and 438 DF, p-value: 1.601e-06
There is significant linear decreasing between crime rate and percent unemployment.
We then want to examine whether there is linear relationship between each variables.
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(hsgrad, totalinc)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Percent high school graduates", y="Total personal income")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
hs_to = lm(totalinc ~ hsgrad, data = cdi)
summary(hs_to)
##
## Call:
## lm(formula = totalinc ~ hsgrad, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7284 -5452 -4023 530 176963
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1693.20 6827.87 0.248 0.804
## hsgrad 79.63 87.68 0.908 0.364
##
## Residual standard error: 12890 on 438 degrees of freedom
## Multiple R-squared: 0.00188, Adjusted R-squared: -0.0003991
## F-statistic: 0.8249 on 1 and 438 DF, p-value: 0.3643
There is NO linear relationship between total personal income and percent high school graduates.
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(poverty, hsgrad)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Percent below poverty level", y="Percent high school graduates")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
po_hs = lm(hsgrad ~ poverty, data = cdi)
summary(po_hs)
##
## Call:
## lm(formula = hsgrad ~ poverty, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.8748 -2.7290 0.1789 3.4117 12.2977
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 86.64842 0.51375 168.66 <2e-16 ***
## poverty -1.04209 0.05198 -20.05 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.072 on 438 degrees of freedom
## Multiple R-squared: 0.4785, Adjusted R-squared: 0.4773
## F-statistic: 401.9 on 1 and 438 DF, p-value: < 2.2e-16
There is a significant linear decreasing between percent below poverty level and percent high school graduates.
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(poverty, totalinc)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Percent below poverty level", y="Total personal income")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
po_to = lm(totalinc ~ poverty, data = cdi)
summary(po_to)
##
## Call:
## lm(formula = totalinc ~ poverty, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6774 -5513 -4047 725 176669
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8804.0 1305.7 6.743 4.92e-11 ***
## poverty -107.2 132.1 -0.811 0.418
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12890 on 438 degrees of freedom
## Multiple R-squared: 0.001501, Adjusted R-squared: -0.0007789
## F-statistic: 0.6583 on 1 and 438 DF, p-value: 0.4176
There is NO linear relationship between total personal income and percent below poverty level.
With other variables
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(unemp, hsgrad)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Percent unemployment", y="Percent high school graduates")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
hs_un = lm(hsgrad ~ unemp, data = cdi)
summary(hs_un)
##
## Call:
## lm(formula = hsgrad ~ unemp, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.0550 -3.1145 0.6782 3.8344 18.8605
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 89.3101 0.8074 110.61 <2e-16 ***
## unemp -1.7811 0.1154 -15.44 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.652 on 438 degrees of freedom
## Multiple R-squared: 0.3524, Adjusted R-squared: 0.3509
## F-statistic: 238.3 on 1 and 438 DF, p-value: < 2.2e-16
There is a significant linear decreasing between percent unemployment and percent high school graduates.
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(pcincome, hsgrad)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Per capita income", y="Percent high school graduates")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
hs_pc = lm(hsgrad ~ pcincome, data = cdi)
summary(hs_pc)
##
## Call:
## lm(formula = hsgrad ~ pcincome, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.2272 -2.8779 -0.1909 3.8914 17.0099
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.078e+01 1.337e+00 45.45 <2e-16 ***
## pcincome 9.038e-04 7.038e-05 12.84 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.986 on 438 degrees of freedom
## Multiple R-squared: 0.2735, Adjusted R-squared: 0.2719
## F-statistic: 164.9 on 1 and 438 DF, p-value: < 2.2e-16
There is a significant linear increasing between per capita income and percent high school graduates.
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(hsgrad, bagrad)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Percent high school graduates", y="Percent bachelor’s degrees")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
ba_hs = lm(bagrad ~ hsgrad, data = cdi)
summary(ba_hs)
##
## Call:
## lm(formula = bagrad ~ hsgrad, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.8819 -4.0177 -0.7579 3.3907 23.5428
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -38.81857 2.86825 -13.53 <2e-16 ***
## hsgrad 0.77229 0.03683 20.97 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.414 on 438 degrees of freedom
## Multiple R-squared: 0.501, Adjusted R-squared: 0.4998
## F-statistic: 439.7 on 1 and 438 DF, p-value: < 2.2e-16
There is a significant linear increasing between percent bachelor’s degrees and percent high school graduates
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(totalinc, bagrad)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Total personal income", y="Percent bachelor’s degrees")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
ba_to = lm(bagrad ~ totalinc, data = cdi)
summary(ba_to)
##
## Call:
## lm(formula = bagrad ~ totalinc, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.065 -5.559 -1.339 4.034 31.575
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.004e+01 4.175e-01 48.00 < 2e-16 ***
## totalinc 1.320e-04 2.768e-05 4.77 2.51e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.472 on 438 degrees of freedom
## Multiple R-squared: 0.04939, Adjusted R-squared: 0.04722
## F-statistic: 22.75 on 1 and 438 DF, p-value: 2.511e-06
There is a significant linear increasing between percent bachelor’s degrees and total personal income.
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(totalinc, pcincome)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Total personal income", y="Per capita income")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
to_pc = lm(pcincome ~ totalinc, data = cdi)
summary(to_pc)
##
## Call:
## lm(formula = pcincome ~ totalinc, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17093.5 -2115.3 -683.8 1559.1 18895.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.770e+04 2.129e+02 83.129 < 2e-16 ***
## totalinc 1.095e-01 1.411e-02 7.761 6.01e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3810 on 438 degrees of freedom
## Multiple R-squared: 0.1209, Adjusted R-squared: 0.1189
## F-statistic: 60.23 on 1 and 438 DF, p-value: 6.014e-14
There is a significant linear increasing between per capita income and total personal income.
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(totalinc, unemp)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Total personal income", y="Percent unemployment")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
to_un = lm(unemp ~ totalinc, data = cdi)
summary(to_un)
##
## Call:
## lm(formula = unemp ~ totalinc, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4216 -1.4263 -0.3943 0.8775 14.6648
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.645e+00 1.307e-01 50.836 <2e-16 ***
## totalinc -6.147e-06 8.665e-06 -0.709 0.478
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.339 on 438 degrees of freedom
## Multiple R-squared: 0.001148, Adjusted R-squared: -0.001133
## F-statistic: 0.5032 on 1 and 438 DF, p-value: 0.4785
There is NO linear relationship between total personal income and percent unemployment.
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(pcincome, unemp)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Per capita income", y="Percent unemployment")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
pc_un = lm(unemp ~ pcincome, data = cdi)
summary(pc_un)
##
## Call:
## lm(formula = unemp ~ pcincome, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5349 -1.3458 -0.3355 0.9244 13.9541
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.004e+01 4.950e-01 20.284 < 2e-16 ***
## pcincome -1.855e-04 2.605e-05 -7.122 4.4e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.216 on 438 degrees of freedom
## Multiple R-squared: 0.1038, Adjusted R-squared: 0.1017
## F-statistic: 50.72 on 1 and 438 DF, p-value: 4.404e-12
There is a significant linear decreasing between per capita income and percent unemployment.
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(pcincome, poverty)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Per capita income", y="Percent below poverty level")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
pc_po = lm(poverty ~ pcincome, data = cdi)
summary(pc_po)
##
## Call:
## lm(formula = poverty ~ pcincome, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.8943 -2.6019 -0.6274 1.6855 20.9093
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.153e+01 8.318e-01 25.89 <2e-16 ***
## pcincome -6.903e-04 4.378e-05 -15.77 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.724 on 438 degrees of freedom
## Multiple R-squared: 0.3621, Adjusted R-squared: 0.3606
## F-statistic: 248.6 on 1 and 438 DF, p-value: < 2.2e-16
There is a significant linear decreasing between per capita income and percent below poverty level.
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(pcincome, bagrad)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Per capita income", y="Percent bachelor’s degrees")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
ba_pc = lm(bagrad ~ pcincome, data = cdi)
summary(ba_pc)
##
## Call:
## lm(formula = bagrad ~ pcincome, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.0102 -3.8563 -0.8948 2.8291 22.4822
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.258e+00 1.230e+00 -2.648 0.00839 **
## pcincome 1.311e-03 6.475e-05 20.250 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.507 on 438 degrees of freedom
## Multiple R-squared: 0.4835, Adjusted R-squared: 0.4823
## F-statistic: 410.1 on 1 and 438 DF, p-value: < 2.2e-16
There is a significant linear increasing between per capita income and percent below poverty level.
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(unemp, poverty)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Percent unemployment", y="Percent below poverty level")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
po_un = lm(poverty ~ unemp, data = cdi)
summary(po_un)
##
## Call:
## lm(formula = poverty ~ unemp, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.1606 -2.9857 -0.4486 2.0124 21.5913
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.97952 0.59906 4.974 9.45e-07 ***
## unemp 0.87032 0.08561 10.166 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.193 on 438 degrees of freedom
## Multiple R-squared: 0.1909, Adjusted R-squared: 0.1891
## F-statistic: 103.4 on 1 and 438 DF, p-value: < 2.2e-16
There is a significant linear increasing between percent unemployment and percent below poverty level.
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(unemp, bagrad)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Percent unemployment", y="Percent bachelor's degrees")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
ba_un = lm(bagrad ~ unemp, data = cdi)
summary(ba_un)
##
## Call:
## lm(formula = bagrad ~ unemp, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.163 -4.484 -1.163 3.616 25.912
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.7635 0.9208 35.58 <2e-16 ***
## unemp -1.7710 0.1316 -13.46 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.445 on 438 degrees of freedom
## Multiple R-squared: 0.2926, Adjusted R-squared: 0.291
## F-statistic: 181.2 on 1 and 438 DF, p-value: < 2.2e-16
There is a significant linear decreasing between per capita income and percent unemployment
# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>%
ggplot(aes(poverty, bagrad)) + geom_point(color='blue') + theme_bw(base_size=20) +
geom_smooth(method='lm', se=TRUE, color='red') +
labs(x="Percent below poverty level", y="Percent bachelor's degrees")
## `geom_smooth()` using formula 'y ~ x'
# Simple linear regression
ba_po = lm(bagrad ~ poverty, data = cdi)
summary(ba_po)
##
## Call:
## lm(formula = bagrad ~ poverty, data = cdi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.9077 -5.1219 -0.5845 3.8046 28.2510
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 26.93575 0.70858 38.014 <2e-16 ***
## poverty -0.67135 0.07169 -9.364 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.995 on 438 degrees of freedom
## Multiple R-squared: 0.1668, Adjusted R-squared: 0.1649
## F-statistic: 87.69 on 1 and 438 DF, p-value: < 2.2e-16
There is a significant linear decreasing between percent below poverty level and percent bachelor’s degrees
There is linear relationship between total personal income (+), poverty(+), and percent high school graduates(-) with crimes rate.
There is linear relationship between percent bachelor’s degrees(+), and per capita income(+), with total personal income.
There is linear relationship between percent high school graduates(-), percent bachelor’s degrees(-), and per capita income(-) with percent below poverty level.
There is linear relationship between percent below poverty level(-), percent unemployment(-), percent bachelor’s degrees(+), and per capita income(+) with percent high school graduates.